Loading library
Reading input
1. Distribution of Mosquito by Species
mosquito_data %>% filter(YEAR == "2004" | YEAR == "2013") %>%
select(VECTOR, COUNTRY, X, Y) %>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', hoverinfo='COUNTRY', split = ~VECTOR, width = 800, height = 800) %>%
layout(title = 'Mosquito Population vs. Country', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4))
Analysis: Brazil has the highest density of ‘Aedes Aegypti’, similarly Taiwan has the density of ‘Aedes albopictus’. The first perception problem here is the fact the number of datapoints is directly associated with the population which is not the case.
p1 <- mosquito_data %>% filter(YEAR == "2004" & VECTOR == "Aedes aegypti") %>% group_by(COUNTRY, X, Y) %>% summarise(count = n())%>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', size=~count, hoverinfo = 'VECTOR') %>% layout(title = 'Aedes aegypti Mosquito Population vs. Country in 2004')
p2 <- mosquito_data %>% filter(YEAR == "2013" & VECTOR == "Aedes aegypti") %>% group_by(COUNTRY, X, Y) %>% summarise(count = n())%>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', size=~count, hoverinfo = 'VECTOR') %>% layout(title = 'Aedes aegypti Mosquito Population vs. Country in 2013')
p3 <- mosquito_data %>% filter(YEAR == "2004" & VECTOR == "Aedes albopictus") %>% group_by(COUNTRY, X, Y) %>% summarise(count = n())%>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', size=~count, hoverinfo = 'VECTOR') %>% layout(title = 'Aedes albopictus Mosquito Population vs. Country in 2004')
p4 <- mosquito_data %>% filter(YEAR == "2013" & VECTOR == "Aedes albopictus") %>% group_by(COUNTRY, X, Y) %>% summarise(count = n())%>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', size=~count, hoverinfo = 'VECTOR') %>% layout(title = ' Aedes albopictus Mosquito Population vs. Country in 2013')
p1
## Warning: `line.width` does not currently support multiple values.
p2
## Warning: `line.width` does not currently support multiple values.
p3
## Warning: `line.width` does not currently support multiple values.
p4
## Warning: `line.width` does not currently support multiple values.
Analysis: Going from 2004 to 2013 the population of ‘Aedes aegypti’ increased in the world, especially in Brazil. However the incase of ‘Aedes albopictus’ the population decreased. Again this could be the case of the data points alone.
2. Geoplot of mosquitos per country
mosquito_data %>% group_by(COUNTRY, COUNTRY_ID) %>% summarise(count = n()) %>% plot_geo(width = 1200, height = 800) %>% add_trace(
z = ~count, text = ~COUNTRY, locations = ~COUNTRY_ID,
color = ~count, colors = 'Purples'
) %>%
colorbar(title = "Mosquito Population") %>%
layout(
title = 'Mosquito Population vs. Country', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4),
geo = list(projection = list(type = 'Equirectangular')
)
)
Analysis: Due to rolling up of data we have lost the granularity of the data (level of the data), also the scale of data is mostly below 5K, while only Brazil is close to 10K, thus the no country stand out.
3a. Equirectangular projection and choropleth color log(Z)
mosquito_data %>% group_by(COUNTRY, COUNTRY_ID) %>% summarise(count = n()) %>% plot_geo(width = 1200, height = 800) %>% add_trace(
z = ~log(count), text = ~COUNTRY, locations = ~COUNTRY_ID,
color = ~log(count), colors = 'Purples'
) %>%
colorbar(title = "Mosquito Population in Log") %>%
layout(
title = 'Mosquito Population vs. Country', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4),
geo = list(projection = list(type = 'Equirectangular')
)
)
3b Conic Equal Area projection
mosquito_data %>% group_by(COUNTRY, COUNTRY_ID) %>% summarise(count = n()) %>% plot_geo(width = 1200, height = 800) %>% add_trace(
z = ~log(count), text = ~COUNTRY, locations = ~COUNTRY_ID,
color = ~log(count), colors = 'Purples'
) %>%
colorbar(title = "Mosquito Population in Log") %>%
layout(
title = 'Mosquito Population vs. Country', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4),
geo = list(projection = list(type = 'Conic equal area')
)
)
Analysis: Making the scale in log brings more information in chart compared to non log values. However the there is not much difference in the two types of projection when it comes to analysis.
4. Resolve and Map for Brazil with discreet cuts
mosquito_data$X_cut = cut_interval(mosquito_data$X, n = 100, labels=FALSE)
mosquito_data$Y_cut = cut_interval(mosquito_data$Y, n = 100, labels=FALSE)
mosquito_data %>% filter(YEAR == "2013" & COUNTRY == "Brazil") %>% group_by(COUNTRY, VECTOR, X_cut, Y_cut) %>%
summarise(X_mean = mean(X), Y_mean = mean(Y), count=n()) %>%
plot_mapbox(lon = ~X_mean, lat = ~Y_mean, mode = 'scattermapbox', split = ~VECTOR, hoverinfo='count', width = 800, height = 800, color = ~count) %>% add_trace(z = ~count) %>% layout(title = 'Mosquito Population vs. Brazil', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4))
## Warning: 'scattermapbox' objects don't have these attributes: 'z'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'lon', 'lat', 'mode', 'text', 'hovertext', 'line', 'connectgaps', 'marker', 'fill', 'fillcolor', 'textfont', 'textposition', 'selected', 'unselected', 'subplot', 'idssrc', 'customdatasrc', 'hoverinfosrc', 'lonsrc', 'latsrc', 'textsrc', 'hovertextsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
Analysis: Taking the mean of the cordintaes definetly helped the cases, although this reduced the level of detail. In terms of population of mosquito we find that “Patos”, “Bauru” have the highest count of mosquitos.
Assignment 2 Visualization of Income of Swedish Household
2. Violin Plot showing the distribution of wealth
income_data %>% plot_ly(x =~age_group ,y = ~X2016, type = 'violin', split = ~age_group, box = list(visible = T), meanline = list(visible = T)) %>% layout(yaxis = list(title = "income Distribution", zeroline = F), xaxis = list(title = "Age Group"), title = "Income Distribution vs. Age Group in Sweden 2016")
Analysis: As expected the bulk of young earn lesser than adults and seniors. The minima of senior and adults are closer but the mean of senior is highest among all classes. All three classes have a single mode.
3. Surface Plot showing the distribution of wealth
smoothed_surface=interp(income_data_recasted$Young, income_data_recasted$Adult, income_data_recasted$Senior, duplicate = "mean")
plot_ly(x=~smoothed_surface$x, y=~smoothed_surface$y, z=~smoothed_surface$z, type="surface") %>% layout(yaxis = list(title = "Adult Income", zeroline = F), xaxis = list(title = "Young Income"), title = "Surface Plot of Income Distribution between Young, Adult and Senior")
Analysis: We see an almost increasing linear relationship between Income of Young and Adult with Senior. We also think a linear regression will be a good for this dataset.
4. Choropleth of Income distribution
income_data_recasted <- income_data_recasted %>% separate(region, c("region_no", "region", "type"), " ")
## Warning: Expected 3 pieces. Additional pieces discarded in 1 rows [12].
income_data_recasted$region <- as.character(income_data_recasted$region)
rds <- inner_join(x = rds[,c("NAME_1", "geometry")], y = income_data_recasted, by=c("NAME_1" = "region"))
#plotly
plot_ly() %>% add_sf(data=rds, split=~NAME_1, color=~Young, showlegend=F, alpha=1, type = "scatter") %>% layout(title = "Choropleth of Income of Young")
plot_ly() %>% add_sf(data=rds, split=~NAME_1, color=~Adult, showlegend=F, alpha=1, type = "scatter") %>% layout(title = "Choropleth of Income of Adult")
Analysis: We see that people in Stockholm and Gothenberg make more money than rest of Sweden, this may also be due to higher cost of living. We find that only in the case of Adults does Uppsala favours a higher income than others.
5. GPVisualizier
latitude = 58.409814
longitude = 15.624525
name = "Linkoping"
desc = "Linköping, Östergötlands län, SE"
our_location <- data.frame(latitude, longitude, name, desc)
plot_ly() %>% add_sf(data=rds, split=~NAME_1, color=~Young, showlegend=F, alpha=1, type = "scatter") %>% layout(title = "Choropleth of Income of Young") %>% add_markers(data = our_location, y = ~latitude, x = ~longitude, color = I("red"), text='Linkoping')
Analysis: We are located on the reddot on the map.
Apendix
knitr::opts_chunk$set(echo = FALSE)
library(data.table)
library(dplyr)
library(tidyr)
library(plotly)
library(ggplot2)
library(akima)
library(sf)
Sys.setenv('MAPBOX_TOKEN' = 'pk.eyJ1IjoiYW51Ymhhdi1kaWtzaGl0IiwiYSI6ImNqbWI1dDVjeTAwNG0zd21pcHRoMTE4YWgifQ.r4m2I2AYIvAzCNyUn4tiuw')
knitr::opts_chunk$set(echo = TRUE)
mosquito_data <- read.csv("aegypti_albopictus.csv")
mosquito_data %>% filter(YEAR == "2004" | YEAR == "2013") %>%
select(VECTOR, COUNTRY, X, Y) %>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', hoverinfo='COUNTRY', split = ~VECTOR, width = 800, height = 800) %>%
layout(title = 'Mosquito Population vs. Country', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4))
p1 <- mosquito_data %>% filter(YEAR == "2004" & VECTOR == "Aedes aegypti") %>% group_by(COUNTRY, X, Y) %>% summarise(count = n())%>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', size=~count, hoverinfo = 'VECTOR') %>% layout(title = 'Aedes aegypti Mosquito Population vs. Country in 2004')
p2 <- mosquito_data %>% filter(YEAR == "2013" & VECTOR == "Aedes aegypti") %>% group_by(COUNTRY, X, Y) %>% summarise(count = n())%>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', size=~count, hoverinfo = 'VECTOR') %>% layout(title = 'Aedes aegypti Mosquito Population vs. Country in 2013')
p3 <- mosquito_data %>% filter(YEAR == "2004" & VECTOR == "Aedes albopictus") %>% group_by(COUNTRY, X, Y) %>% summarise(count = n())%>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', size=~count, hoverinfo = 'VECTOR') %>% layout(title = 'Aedes albopictus Mosquito Population vs. Country in 2004')
p4 <- mosquito_data %>% filter(YEAR == "2013" & VECTOR == "Aedes albopictus") %>% group_by(COUNTRY, X, Y) %>% summarise(count = n())%>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', size=~count, hoverinfo = 'VECTOR') %>% layout(title = ' Aedes albopictus Mosquito Population vs. Country in 2013')
p1
p2
p3
p4
mosquito_data %>% group_by(COUNTRY, COUNTRY_ID) %>% summarise(count = n()) %>% plot_geo(width = 1200, height = 800) %>% add_trace(
z = ~count, text = ~COUNTRY, locations = ~COUNTRY_ID,
color = ~count, colors = 'Purples'
) %>%
colorbar(title = "Mosquito Population") %>%
layout(
title = 'Mosquito Population vs. Country', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4),
geo = list(projection = list(type = 'Equirectangular')
)
)
mosquito_data %>% group_by(COUNTRY, COUNTRY_ID) %>% summarise(count = n()) %>% plot_geo(width = 1200, height = 800) %>% add_trace(
z = ~log(count), text = ~COUNTRY, locations = ~COUNTRY_ID,
color = ~log(count), colors = 'Purples'
) %>%
colorbar(title = "Mosquito Population in Log") %>%
layout(
title = 'Mosquito Population vs. Country', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4),
geo = list(projection = list(type = 'Equirectangular')
)
)
mosquito_data %>% group_by(COUNTRY, COUNTRY_ID) %>% summarise(count = n()) %>% plot_geo(width = 1200, height = 800) %>% add_trace(
z = ~log(count), text = ~COUNTRY, locations = ~COUNTRY_ID,
color = ~log(count), colors = 'Purples'
) %>%
colorbar(title = "Mosquito Population in Log") %>%
layout(
title = 'Mosquito Population vs. Country', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4),
geo = list(projection = list(type = 'Conic equal area')
)
)
mosquito_data$X_cut = cut_interval(mosquito_data$X, n = 100, labels=FALSE)
mosquito_data$Y_cut = cut_interval(mosquito_data$Y, n = 100, labels=FALSE)
mosquito_data %>% filter(YEAR == "2013" & COUNTRY == "Brazil") %>% group_by(COUNTRY, VECTOR, X_cut, Y_cut) %>%
summarise(X_mean = mean(X), Y_mean = mean(Y), count=n()) %>%
plot_mapbox(lon = ~X_mean, lat = ~Y_mean, mode = 'scattermapbox', split = ~VECTOR, hoverinfo='count', width = 800, height = 800, color = ~count) %>% add_trace(z = ~count) %>% layout(title = 'Mosquito Population vs. Brazil', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4))
income_data <- read.csv("Income_Data.csv")
rds <- readRDS("gadm36_SWE_1_sf.rds")
income_data_recasted <- dcast(income_data, region~age, value.var = "X2016")
setnames(income_data_recasted, old=c("18-29 years", "30-49 years", "50-64 years" ), new=c("Young", "Adult", "Senior"))
income_data$age_group <- ifelse(income_data$age == "18-29 years", "Young",
ifelse(income_data$age == "30-49 years", "Adult","Senior"))
income_data %>% plot_ly(x =~age_group ,y = ~X2016, type = 'violin', split = ~age_group, box = list(visible = T), meanline = list(visible = T)) %>% layout(yaxis = list(title = "income Distribution", zeroline = F), xaxis = list(title = "Age Group"), title = "Income Distribution vs. Age Group in Sweden 2016")
smoothed_surface=interp(income_data_recasted$Young, income_data_recasted$Adult, income_data_recasted$Senior, duplicate = "mean")
plot_ly(x=~smoothed_surface$x, y=~smoothed_surface$y, z=~smoothed_surface$z, type="surface") %>% layout(yaxis = list(title = "Adult Income", zeroline = F), xaxis = list(title = "Young Income"), title = "Surface Plot of Income Distribution between Young, Adult and Senior")
income_data_recasted <- income_data_recasted %>% separate(region, c("region_no", "region", "type"), " ")
income_data_recasted$region <- as.character(income_data_recasted$region)
rds <- inner_join(x = rds[,c("NAME_1", "geometry")], y = income_data_recasted, by=c("NAME_1" = "region"))
#plotly
plot_ly() %>% add_sf(data=rds, split=~NAME_1, color=~Young, showlegend=F, alpha=1, type = "scatter") %>% layout(title = "Choropleth of Income of Young")
plot_ly() %>% add_sf(data=rds, split=~NAME_1, color=~Adult, showlegend=F, alpha=1, type = "scatter") %>% layout(title = "Choropleth of Income of Adult")
latitude = 58.409814
longitude = 15.624525
name = "Linkoping"
desc = "Linköping, Östergötlands län, SE"
our_location <- data.frame(latitude, longitude, name, desc)
plot_ly() %>% add_sf(data=rds, split=~NAME_1, color=~Young, showlegend=F, alpha=1, type = "scatter") %>% layout(title = "Choropleth of Income of Young") %>% add_markers(data = our_location, y = ~latitude, x = ~longitude, color = I("red"), text='Linkoping')